Observed data

Observed log-chlorophyll at representative station in SF Bay Delta region.

library(tidyverse)
library(lubridate)
library(mgcv)  
library(plotly)
library(WRTDStidal)
library(gridExtra)
source('R/funcs.R')

# flow data, left moving window average of 30 days
data(sf_fldat)
fl_dat <- sf_fldat %>% 
  rename(date = Date) %>% 
  filter(station %in% 'sac') %>% 
  mutate(
    qsm = stats::filter(q, rep(1, 30)/30, sides = 1, method = 'convolution')
  )
  
# format the data to model
data(sf_dat)
sf_mod <- sf_dat %>% 
  filter(Site_Code %in% 'C3') %>% 
  rename(date = Date) %>% 
  mutate(
    doy = yday(date), 
    dec_time = decimal_date(date), 
    yr = year(date),
    mo = month(date, label = T)
  ) %>% 
  left_join(fl_dat, by = 'date') %>% 
  mutate( # all variables ln-transformed
    flo = log(qsm), 
    chl = log(chl), 
    tss = log(tss), 
    nh = log(nh)
    ) %>% 
  select(-q, -qsm, -station, -Latitude, -Longitude, -Location)

# plot, all
p <- ggplot(sf_mod, aes(x = date, y = chl)) + 
  geom_line() +
  theme_bw() 
ggplotly(p)
# boxplot, by years
p <- ggplot(sf_mod, aes(x = yr, y = chl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sf_mod, aes(x = mo, y = chl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)

Adding nitrogen and turbidity covariates

# formula for best annual, seasonal, flow model
strt <- best2$formula %>% 
  as.character

smths <- c(
  "s(nh, bs = 'tp')",  
  "s(tss, bs = 'tp')",
  "te(nh, tss, bs = c('tp', 'tp'))"
)

# get all combinations of smoothers to model, one to many
frmstab <- list()
frms <- list()
for(i in seq_along(smths)){
  
  # for the summary table  
  frmtab <- combn(smths, i) %>%
    apply(2, function(x){
      paste(x, collapse = ' + ')
      })
  
  # for the model
  frm <- sapply(frmtab, function(x){  
        paste('chl ~', strt[3], '+', x) %>% 
          formula
        })
  
  frmstab <- c(frmstab, frmtab)
  frms <- c(frms, frm)
 
}

# create models from smooth formula combinations
mods3 <- map(frms, function(frm){
  
  gam(frm, 
    knots = list(doy = c(1, 366)),
    data = sf_mod, 
    na.action = na.exclude
  )

})
names(mods3) <- paste0('mod', seq_along(mods3))

Summary of all nutrient, tss models:

# smoother stats of GAMs
map(mods3, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable(digits = 2)
name smoother edf Ref.df F p.value
mod1 s(dec_time) 6.73 7.79 7.44 0.00
mod1 s(flo) 3.10 3.96 3.09 0.01
mod1 te(flo,dec_time) 11.10 16.00 1.74 0.00
mod1 te(dec_time,doy,flo) 36.51 75.00 3.74 0.00
mod1 s(nh) 1.00 1.00 10.76 0.00
mod2 s(dec_time) 6.76 7.81 3.57 0.00
mod2 s(flo) 4.56 5.61 0.52 0.81
mod2 te(flo,dec_time) 6.31 16.00 0.87 0.00
mod2 te(dec_time,doy,flo) 29.49 75.00 3.27 0.00
mod2 s(tss) 4.46 5.53 6.33 0.00
mod3 s(dec_time) 7.16 8.16 3.02 0.00
mod3 s(flo) 1.00 1.00 1.01 0.31
mod3 te(flo,dec_time) 6.71 16.00 0.92 0.00
mod3 te(dec_time,doy,flo) 40.93 75.00 3.68 0.00
mod3 te(nh,tss) 3.00 3.00 10.28 0.00
mod4 s(dec_time) 7.08 8.10 3.04 0.00
mod4 s(flo) 1.00 1.00 1.06 0.30
mod4 te(flo,dec_time) 6.88 16.00 0.92 0.00
mod4 te(dec_time,doy,flo) 41.24 75.00 3.82 0.00
mod4 s(nh) 1.00 1.00 1.68 0.20
mod4 s(tss) 1.00 1.00 20.71 0.00
mod5 s(dec_time) 7.16 8.15 2.83 0.00
mod5 s(flo) 1.00 1.00 0.81 0.37
mod5 te(flo,dec_time) 6.37 16.00 0.82 0.00
mod5 te(dec_time,doy,flo) 40.99 75.00 3.69 0.00
mod5 s(nh) 2.94 3.68 0.61 0.68
mod5 te(nh,tss) 2.29 20.00 0.85 0.00
mod6 s(dec_time) 6.87 7.93 2.91 0.00
mod6 s(flo) 1.00 1.00 0.55 0.46
mod6 te(flo,dec_time) 6.81 16.00 0.90 0.00
mod6 te(dec_time,doy,flo) 40.70 75.00 3.65 0.00
mod6 s(tss) 3.80 4.79 6.06 0.00
mod6 te(nh,tss) 0.17 20.00 0.01 0.26
mod7 s(dec_time) 7.08 8.10 3.04 0.00
mod7 s(flo) 1.00 1.00 1.06 0.30
mod7 te(flo,dec_time) 6.88 16.00 0.92 0.00
mod7 te(dec_time,doy,flo) 41.24 75.00 3.82 0.00
mod7 s(nh) 1.00 1.00 1.68 0.20
mod7 s(tss) 1.00 1.00 20.71 0.00
mod7 te(nh,tss) 0.00 16.00 0.00 1.00
# summary stats of GAMs
map(mods3, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  mutate(smth_added = frmstab) %>%
  select(name, smth_added, everything()) %>% 
  kable(digits = 2)
name smth_added AIC R2
mod1 s(nh, bs = ‘tp’) 868.68 0.59
mod2 s(tss, bs = ‘tp’) 854.28 0.59
mod3 te(nh, tss, bs = c(‘tp’, ‘tp’)) 843.63 0.60
mod4 s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) 842.25 0.60
mod5 s(nh, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) 845.49 0.60
mod6 s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) 843.72 0.60
mod7 s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) 842.25 0.60

Summary stats of best three three models:

# best model with season, year, flow
best3 <- map(mods3, ~ summary(.x)$r.sq) %>% 
  unlist %>% 
  which.max %>% 
  mods3[[.]] 

best <- list(best1 = best1, best2 = best2, best3 = best3)

# smoother stats of GAMs
map(best, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable(digits = 2)
name smoother edf Ref.df F p.value
best1 s(dec_time) 7.33 8.31 9.28 0.00
best1 s(doy) 3.39 8.00 5.28 0.00
best1 te(dec_time,doy) 6.64 15.00 1.42 0.00
best2 s(dec_time) 6.45 7.55 4.65 0.00
best2 s(flo) 3.20 4.05 1.93 0.09
best2 te(flo,dec_time) 11.20 16.00 1.85 0.00
best2 te(dec_time,doy,flo) 37.05 75.00 3.69 0.00
best3 s(dec_time) 7.08 8.10 3.04 0.00
best3 s(flo) 1.00 1.00 1.06 0.30
best3 te(flo,dec_time) 6.88 16.00 0.92 0.00
best3 te(dec_time,doy,flo) 41.24 75.00 3.82 0.00
best3 s(nh) 1.00 1.00 1.68 0.20
best3 s(tss) 1.00 1.00 20.71 0.00
best3 te(nh,tss) 0.00 16.00 0.00 1.00
# summary stats of GAMs
map(best, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  kable(digits = 2)
name AIC R2
best1 971.92 0.48
best2 880.84 0.59
best3 842.25 0.60
# predictions
sf_res3 <- map(best, function(x){
  sf_mod %>% 
    mutate(
      pred = predict(x)
    )
  }) %>% 
  enframe('mods') %>% 
  unnest

# plot
p <- ggplot(sf_res3, aes(x = date)) + 
  geom_point(data = sf_mod, aes(y = chl), size = 0.5) + 
  geom_line(aes(y = pred, colour = mods)) + 
  theme_bw() + 
  theme(
    legend.position = 'top', 
    legend.title = element_blank()
    )
ggplotly(p)